Guided section

library(tidyverse)
library(plotly)

1. Read data

data <- read_csv('./gapminder_clean.csv')
data <- data %>%
    select(-1) %>%
    rename(
       co2em = `CO2 emissions (metric tons per capita)`,
       popden = `Population density (people per sq. km of land area)`,
       lifeExp = `Life expectancy at birth, total (years)`,
    )

2. Scatter plot of CO2 emissions and GDP in 1962

data1962 <- data %>%
    filter(Year == 1962) %>%
    select(gdpPercap, co2em) %>%
    drop_na()
ggplot(data = data1962) +
    geom_point(mapping = aes(
        x = gdpPercap,
        y = co2em)) +
    labs(x = "GDP per capita", y = "CO2 emissions per capita (metric tons)")

3. Pearson correlation of CO2 emissions and GDP

cor.test(data1962 %>% pull(gdpPercap), data1962 %>% pull(co2em))
## 
##  Pearson's product-moment correlation
## 
## data:  data1962 %>% pull(gdpPercap) and data1962 %>% pull(co2em)
## t = 25.269, df = 106, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8934697 0.9489792
## sample estimates:
##       cor 
## 0.9260817

4. Year of strongest correlation between CO2 emissions and GDP

corrs <- data %>%
    group_by(Year) %>%
    select(Year, gdpPercap, co2em) %>%
    drop_na() %>%
    summarise(correlation = cor(gdpPercap, co2em))
maxi <- lapply(corrs, max)

The strongest correlation is 0.9387918 in the year 2007.

5. Interactive scatter plot of CO2 emissions and GDP

max_em_year_data <- data %>%
    filter(Year == maxi$Year) %>%
    select(gdpPercap, co2em, pop, continent, `Country Name`) %>%
    drop_na()
fig <- ggplot(data = max_em_year_data) +
    geom_point(aes(
        x = gdpPercap,
        y = co2em,
        size = pop,
        color = continent,
        text = paste("Country: ", `Country Name`,
             "\nGDP: ", gdpPercap,
             "\nCO2 emissions: ", co2em))) +
    xlab("GDP per capita") +
    ylab("CO2 emissions per capita (metric tons)") +
    ggtitle(str_glue("GDP vs CO2 emissions per capita in ", maxi$Year))
ggplotly(fig, tooltip = "text")

Open section

1. Relationship between continent and energy use

2. Difference of imports between Europe and Asia

3. Country with the highest average population density in 1962-2007

data_popden <- data %>%
    group_by(`Country Name`) %>%
    select(`Country Name`, popden) %>%
    summarise(avg_popden = mean(popden, na.rm = TRUE)) %>%
    arrange(desc(avg_popden))
num_countries_shown <- 20
ggplot(data = head(data_popden, n = num_countries_shown)) +
    geom_bar(
        mapping = aes(x = avg_popden, y = reorder(`Country Name`, avg_popden)),
        stat = "identity") +
    xlab("Average population density (people per sq. km of land)") +
    ylab("") +
    ggtitle(str_glue(num_countries_shown, " most population dense countries 1962-2007"))

The country with the highest average population density between 1962 and 2007 is Macao SAR, China.

4. Country with the greatest increase in life expectancy at birth since 1962

data_lifeExp <- data %>%
    select(`Country Name`, Year, lifeExp) %>%
    drop_na() %>%
    group_by(`Country Name`) %>%
    filter(any(Year == 1962)) %>%
    mutate(lifeExpSince1962 = lifeExp - lifeExp[Year == 1962]) %>%
    ungroup()

countries_highest <- data_lifeExp %>%
    group_by(`Country Name`) %>%
    mutate(lifeExpTotalChange = lifeExp[Year == 2007] - lifeExp[Year == 1962]) %>%
    summarise(lifeExpChange = max(lifeExpTotalChange)) %>%
    arrange(desc(lifeExpChange))

cutoff_lifeExpChange <- min(head(countries_highest, n = 10)$lifeExpChange)

data_lifeExpTop <- data_lifeExp %>%
    group_by(`Country Name`) %>%
    filter(lifeExp[Year == 2007] - lifeExp[Year == 1962] >= cutoff_lifeExpChange)

fig <- ggplot(data = data_lifeExpTop) + 
    geom_line(mapping = aes(
        x = Year,
        y = lifeExpSince1962,
        color = `Country Name`)
    ) +
    ylab("Life expectancy at birth since 1962 (years)")
ggplotly(fig)

The Maldives had the highest increase in life expectancy at birth from 1962 to 2007.